
# ==============================================================================
# file name: SI-sample-balance.R
# authors: Bernhard Clemm 
# date: March 18, 2022
# purpose: Recode survey data
# ==============================================================================

# SETUP ========================================================================

basedir <- paste0(dirname(dirname(
  rstudioapi::getSourceEditorContext()$path)), "/")
codedir <- paste0(basedir, "code/")
datadir <- paste0(basedir, "data/")

library(tidyverse)
library(kableExtra)

data_wide <- read.csv(paste0(datadir, "processed/data_wide.csv"))

# US =================================================================

data_wide_us_consented_treat <- data_wide %>% 
  filter(country == "US" & !is.na(consented) & condition_num == 1)
data_wide_us_consented_cont <- data_wide %>% 
  filter(country == "US" & !is.na(consented) & condition_num == 0)

data_wide_us_post_treat <- data_wide %>% 
  filter(country == "US" & !is.na(respondent_id_post) & condition_num == 1)
data_wide_us_post_cont <- data_wide %>% 
  filter(country == "US" & !is.na(respondent_id_post) & condition_num == 0)

balance_us <- 
  rbind(
    data.frame(prop.table(table(data_wide_us_consented_treat$gender_cat))*100, variable = "Gender"),
    data.frame(prop.table(table(data_wide_us_consented_treat$age_cat))*100, variable = "Age"), 
    data.frame(prop.table(table(data_wide_us_consented_treat$edu_cat))*100, variable = "Education")
  ) %>%
  rename(value = Var1, consented_treat = Freq) %>%
  left_join(., rbind(
    data.frame(prop.table(table(data_wide_us_consented_cont$gender_cat))*100),
    data.frame(prop.table(table(data_wide_us_consented_cont$age_cat))*100), 
    data.frame(prop.table(table(data_wide_us_consented_cont$edu_cat))*100)
  ) %>%
    rename(value = Var1, consented_cont = Freq)) %>%
  left_join(., rbind(
    data.frame(prop.table(table(data_wide_us_post_treat$gender_cat))*100),
    data.frame(prop.table(table(data_wide_us_post_treat$age_cat))*100), 
    data.frame(prop.table(table(data_wide_us_post_treat$edu_cat))*100)
  ) %>%
    rename(value = Var1, post_treat = Freq)) %>%
  left_join(., rbind(
    data.frame(prop.table(table(data_wide_us_post_cont$gender_cat))*100),
    data.frame(prop.table(table(data_wide_us_post_cont$age_cat))*100),
    data.frame(prop.table(table(data_wide_us_post_cont$edu_cat))*100)
  ) %>%
    rename(value = Var1, post_cont = Freq)) %>%
  mutate(across(where(is.numeric), round, 2)) %>%
  select(variable, value, consented_cont, consented_treat, post_cont, post_treat)

# Add significance tests

p_consented_gender_us <- round(chisq.test(rbind(
  (table(data_wide_us_consented_treat$gender_cat)),
  (table(data_wide_us_consented_cont$gender_cat))))$p.value, 3)
p_consented_age_us <- round(chisq.test(rbind(
  (table(data_wide_us_consented_treat$age_cat)),
  (table(data_wide_us_consented_cont$age_cat))))$p.value, 3)
p_consented_edu_us <- round(chisq.test(rbind(
  (table(data_wide_us_consented_treat$edu_cat)),
  (table(data_wide_us_consented_cont$edu_cat))))$p.value, 3)
balance_us <- balance_us %>% 
  mutate(sig_consented = case_when(
    variable == "Gender" ~ p_consented_gender_us,
    variable == "Age" ~ p_consented_age_us,
    variable == "Education" ~ p_consented_edu_us)) 

p_post_gender_us <- round(chisq.test(rbind(
  (table(data_wide_us_post_treat$gender_cat)),
  (table(data_wide_us_post_cont$gender_cat))))$p.value, 3)
p_post_age_us <- round(chisq.test(rbind(
  (table(data_wide_us_post_treat$age_cat)),
  (table(data_wide_us_post_cont$age_cat))))$p.value, 3)
p_post_edu_us <- round(chisq.test(rbind(
  (table(data_wide_us_post_treat$edu_cat)),
  (table(data_wide_us_post_cont$edu_cat))))$p.value, 3)
balance_us <- balance_us %>% 
  mutate(sig_post = case_when(
    variable == "Gender" ~ p_post_gender_us,
    variable == "Age" ~ p_post_age_us,
    variable == "Education" ~ p_post_edu_us)) 

p_cont_gender_us <- round(chisq.test(rbind(
  (table(data_wide_us_consented_cont$gender_cat)),
  (table(data_wide_us_post_cont$gender_cat))))$p.value, 3)
p_cont_age_us <- round(chisq.test(rbind(
  (table(data_wide_us_consented_cont$age_cat)),
  (table(data_wide_us_post_cont$age_cat))))$p.value, 3)
p_cont_edu_us <- round(chisq.test(rbind(
  (table(data_wide_us_consented_cont$edu_cat)),
  (table(data_wide_us_post_cont$edu_cat))))$p.value, 3)
balance_us <- balance_us %>% 
  mutate(sig_cont = case_when(
    variable == "Gender" ~ p_cont_gender_us,
    variable == "Age" ~ p_cont_age_us,
    variable == "Education" ~ p_cont_edu_us)) 

p_treat_gender_us <- round(chisq.test(rbind(
  (table(data_wide_us_consented_treat$gender_cat)),
  (table(data_wide_us_post_treat$gender_cat))))$p.value, 3)
p_treat_age_us <- round(chisq.test(rbind(
  (table(data_wide_us_consented_treat$age_cat)),
  (table(data_wide_us_post_treat$age_cat))))$p.value, 3)
p_treat_edu_us <- round(chisq.test(rbind(
  (table(data_wide_us_consented_treat$edu_cat)),
  (table(data_wide_us_post_treat$edu_cat))))$p.value, 3)
balance_us <- balance_us %>% 
  mutate(sig_treat = case_when(
    variable == "Gender" ~ p_treat_gender_us,
    variable == "Age" ~ p_treat_age_us,
    variable == "Education" ~ p_treat_edu_us))

balance_us <- balance_us %>%
  select(variable, value, 
         consented_cont, consented_treat, sig_consented,
         post_cont, post_treat, sig_post,
         sig_cont, sig_treat)

kable(balance_us, 
      caption = "Balance and differential attrition (US)", 
      format = "latex", booktabs = T, escape = F, linesep = "",
      row.names = F, 
      col.names = c("","", "Control", "Treatment", "Sign.",
                    "Control", "Treatment", "Sign.",
                    "Sign. (control)", "Sign. (treatment)")) %>%
  collapse_rows(columns = 1, valign = "middle", latex_hline = "major") %>%
  collapse_rows(columns = 5, valign = "middle", latex_hline = "major") %>%
  collapse_rows(columns = 8, valign = "middle", latex_hline = "major") %>%
  collapse_rows(columns = 9, valign = "middle", latex_hline = "major") %>%
  collapse_rows(columns = 10, valign = "middle", latex_hline = "major") %>%
  add_header_above(c(" " = 2, "Accepting participation" = 3, "Post-wave" = 3, 
                     "Accepting vs. post-wave" = 2)) %>%
  column_spec (9, border_left = T)

# Poland ======================================================================

data_wide_pl_consented_treat <- data_wide %>% 
  filter(country == "PL" & !is.na(consented) & condition_num == 1)
data_wide_pl_consented_cont <- data_wide %>% 
  filter(country == "PL" & !is.na(consented) & condition_num == 0)

data_wide_pl_post_treat <- data_wide %>% 
  filter(country == "PL" & !is.na(respondent_id_post) & condition_num == 1)
data_wide_pl_post_cont <- data_wide %>% 
  filter(country == "PL" & !is.na(respondent_id_post) & condition_num == 0)

balance_pl <- 
  rbind(
    data.frame(prop.table(table(data_wide_pl_consented_treat$gender_cat))*100, variable = "Gender"),
    data.frame(prop.table(table(data_wide_pl_consented_treat$age_cat))*100, variable = "Age"), 
    data.frame(prop.table(table(data_wide_pl_consented_treat$edu_cat))*100, variable = "Education")
  ) %>%
  rename(value = Var1, consented_treat = Freq) %>%
  left_join(., rbind(
    data.frame(prop.table(table(data_wide_pl_consented_cont$gender_cat))*100),
    data.frame(prop.table(table(data_wide_pl_consented_cont$age_cat))*100), 
    data.frame(prop.table(table(data_wide_pl_consented_cont$edu_cat))*100)
  ) %>%
    rename(value = Var1, consented_cont = Freq)) %>%
  left_join(., rbind(
    data.frame(prop.table(table(data_wide_pl_post_treat$gender_cat))*100),
    data.frame(prop.table(table(data_wide_pl_post_treat$age_cat))*100), 
    data.frame(prop.table(table(data_wide_pl_post_treat$edu_cat))*100)
  ) %>%
    rename(value = Var1, post_treat = Freq)) %>%
  left_join(., rbind(
    data.frame(prop.table(table(data_wide_pl_post_cont$gender_cat))*100),
    data.frame(prop.table(table(data_wide_pl_post_cont$age_cat))*100),
    data.frame(prop.table(table(data_wide_pl_post_cont$edu_cat))*100)
  ) %>%
    rename(value = Var1, post_cont = Freq)) %>%
  mutate(across(where(is.numeric), round, 2)) %>%
  select(variable, value, consented_cont, consented_treat, post_cont, post_treat)

# Add significance tests

p_consented_gender_pl <- round(chisq.test(rbind(
  (table(data_wide_pl_consented_treat$gender_cat)),
  (table(data_wide_pl_consented_cont$gender_cat))))$p.value, 3)
p_consented_age_pl <- round(chisq.test(rbind(
  (table(data_wide_pl_consented_treat$age_cat)),
  (table(data_wide_pl_consented_cont$age_cat))))$p.value, 3)
p_consented_edu_pl <- round(chisq.test(rbind(
  (table(data_wide_pl_consented_treat$edu_cat)),
  (table(data_wide_pl_consented_cont$edu_cat))))$p.value, 3)
balance_pl <- balance_pl %>% 
  mutate(sig_consented = case_when(
    variable == "Gender" ~ p_consented_gender_pl,
    variable == "Age" ~ p_consented_age_pl,
    variable == "Education" ~ p_consented_edu_pl)) 

p_post_gender_pl <- round(chisq.test(rbind(
  (table(data_wide_pl_post_treat$gender_cat)),
  (table(data_wide_pl_post_cont$gender_cat))))$p.value, 3)
p_post_age_pl <- round(chisq.test(rbind(
  (table(data_wide_pl_post_treat$age_cat)),
  (table(data_wide_pl_post_cont$age_cat))))$p.value, 3)
p_post_edu_pl <- round(chisq.test(rbind(
  (table(data_wide_pl_post_treat$edu_cat)),
  (table(data_wide_pl_post_cont$edu_cat))))$p.value, 3)
balance_pl <- balance_pl %>% 
  mutate(sig_post = case_when(
    variable == "Gender" ~ p_post_gender_pl,
    variable == "Age" ~ p_post_age_pl,
    variable == "Education" ~ p_post_edu_pl)) 

p_cont_gender_pl <- round(chisq.test(rbind(
  (table(data_wide_pl_consented_cont$gender_cat)),
  (table(data_wide_pl_post_cont$gender_cat))))$p.value, 3)
p_cont_age_pl <- round(chisq.test(rbind(
  (table(data_wide_pl_consented_cont$age_cat)),
  (table(data_wide_pl_post_cont$age_cat))))$p.value, 3)
p_cont_edu_pl <- round(chisq.test(rbind(
  (table(data_wide_pl_consented_cont$edu_cat)),
  (table(data_wide_pl_post_cont$edu_cat))))$p.value, 3)
balance_pl <- balance_pl %>% 
  mutate(sig_cont = case_when(
    variable == "Gender" ~ p_cont_gender_pl,
    variable == "Age" ~ p_cont_age_pl,
    variable == "Education" ~ p_cont_edu_pl)) 

p_treat_gender_pl <- round(chisq.test(rbind(
  (table(data_wide_pl_consented_treat$gender_cat)),
  (table(data_wide_pl_post_treat$gender_cat))))$p.value, 3)
p_treat_age_pl <- round(chisq.test(rbind(
  (table(data_wide_pl_consented_treat$age_cat)),
  (table(data_wide_pl_post_treat$age_cat))))$p.value, 3)
p_treat_edu_pl <- round(chisq.test(rbind(
  (table(data_wide_pl_consented_treat$edu_cat)),
  (table(data_wide_pl_post_treat$edu_cat))))$p.value, 3)
balance_pl <- balance_pl %>% 
  mutate(sig_treat = case_when(
    variable == "Gender" ~ p_treat_gender_pl,
    variable == "Age" ~ p_treat_age_pl,
    variable == "Education" ~ p_treat_edu_pl)) 

balance_pl <- balance_pl %>%
  select(variable, value, 
         consented_cont, consented_treat, sig_consented,
         post_cont, post_treat, sig_post,
         sig_cont, sig_treat)

kable(balance_pl, 
      caption = "Balance and differential attrition (Poland)", 
      format = "latex", booktabs = T, escape = F, linesep = "",
      row.names = F, 
      col.names = c("","", "Control", "Treatment", "Sign.",
                    "Control", "Treatment", "Sign.",
                    "Sign. (control)", "Sign. (treatment)")) %>%
  collapse_rows(columns = 1, valign = "middle", latex_hline = "major") %>%
  collapse_rows(columns = 5, valign = "middle", latex_hline = "major") %>%
  collapse_rows(columns = 8, valign = "middle", latex_hline = "major") %>%
  collapse_rows(columns = 9, valign = "middle", latex_hline = "major") %>%
  collapse_rows(columns = 10, valign = "middle", latex_hline = "major") %>%
  add_header_above(c(" " = 2, "Accepting participation" = 3, "Post-wave" = 3, 
                     "Accepting vs. post-wave" = 2)) %>%
  column_spec (9, border_left = T)
